In [1]:
    
% reset -f
from __future__ import print_function
from __future__ import division
import math
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
    
In [2]:
    
import torch
import sys
print('__Python VERSION:', sys.version)
print('__pyTorch VERSION:', torch.__version__)
print('__CUDA VERSION')
from subprocess import call
# call(["nvcc", "--version"]) does not work
! nvcc --version
print('__CUDNN VERSION:', torch.backends.cudnn.version())
print('__Number CUDA Devices:', torch.cuda.device_count())
print('__Devices')
call(["nvidia-smi", "--format=csv", "--query-gpu=index,name,driver_version,memory.total,memory.used,memory.free"])
print('Active CUDA Device: GPU', torch.cuda.current_device())
print ('Available devices ', torch.cuda.device_count())
print ('Current cuda device ', torch.cuda.current_device())
    
    
In [3]:
    
x=torch.Tensor(3,4)
if torch.cuda.is_available():
    x = x.cuda()*2     
print (type(x))
print (x)
    
    
In [13]:
    
import numpy as np
import torch.cuda as cu
import contextlib
import time
# allocates a tensor on GPU 1
a = torch.cuda.FloatTensor(1)
# transfers a tensor from CPU to GPU 1
b = torch.FloatTensor(1).cuda()
    
# Timing helper with CUDA synchonization
@contextlib.contextmanager
def timing(name):
    cu.synchronize()
    start_time = time.time()
    yield
    cu.synchronize()
    end_time = time.time()
    print ('{} {:6.3f} seconds'.format(name, end_time-start_time))
    
    
for shape in [(128**3,), (128,128**2), (128,128,128), (32,32,32,64)]:
    print ('shape {}, {:.1f} MB'.format(shape, np.zeros(shape).nbytes/1024.**2))
    with timing('from_numpy sent to GPU     '): torch.from_numpy (np.zeros(shape)).cuda()
    with timing('CPU constructor            '): torch.FloatTensor(np.zeros(shape))
    with timing('CPU constructor sent to GPU'): torch.FloatTensor(np.zeros(shape)).cuda()
    with timing('GPU constructor            '): cu.   FloatTensor(np.zeros(shape))
    print
    
    
In [ ]: